import requests
import re

domain = "https://www.dytt89.com/"
# <META http-equiv=Content-Type content="text/html; charset=gb2312">
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
"Cookie": "Hm_lvt_93b4a7c2e07353c3853ac17a86d4c8a4=1711942771; Hm_lvt_8e745928b4c636da693d2c43470f5413=1711942771; Hm_lvt_0113b461c3b631f7a568630be1134d3d=1711942771; Hm_lpvt_93b4a7c2e07353c3853ac17a86d4c8a4=1711943398; Hm_lpvt_0113b461c3b631f7a568630be1134d3d=1711943398; Hm_lpvt_8e745928b4c636da693d2c43470f5413=1711943398",
"Referer": domain
}
header2 = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0",
}

resp = requests.get(domain, verify=False, headers=header) # verify=False去掉安全验证
resp.encoding="gb2312"      #灵活运用网站的charset，改变encoding,指定字符集
print(resp.text)

# 拿到ul里面的li
obj1 = re.compile(r"2024必看热片.*?<ul>(?P<ul>.*?)</ul>", re.S)
obj2 = re.compile(r"<a href='(?P<href>.*?)'", re.S)
obj3 = re.compile(r'◎片　　名　(?P<movie>.*?)<br />.*?<td'
                  r'style="WORD-WRAP: break-word" bgcolor="#fdfddf"><a href="(?P<download>.*?)">', re.S)

result1 = obj1.finditer(resp.text)
child_href_list = []
for it in result1:
    ul = it.group('ul')

    # 获取子页面链接
    result2 = obj2.finditer(ul)
    for itt in result2:
        # 拼接子页面的url地址： 域名 + 子页面地址
        child_href = domain + itt.group('href').strip("/")
        child_href_list.append(child_href)  # 把子页面链接保存起来

# 提取子页面内容
for href in child_href_list:
    child_resp = requests.get(href, verify=False)
    child_href.encoding = 'gb2312'
    result3 = obj3.search(child_resp.text)
    print(result3.group("movie"))
    print(result3.group("download"))
    # break  #测试用
